******************************************************
*This file cleans industry covariates from Census/ACS*
******************************************************

***Clean the Census data***
foreach year in 1970 1990 2000 2008 {

use if year==`year' using "$raw_data_lmarket/ipums_census.dta", clear

*Drop institutional group quarters* 
quietly: drop if gqtyped>=100 & gqtyped<=499
*Drop alaska and hawaii*
quietly: drop if statefip==2 | statefip==15

*Restrict to working-age population who are salaried workers*
keep if age>=25 & age<=64 & empstat==1 & (classwkrd==22 | classwkrd==23 )

keep cntygp* puma statefip perwt multyear age sex empstat ind1990 classwkrd

**recode the IND1990 to IFR crosspath**
run "$project/xwalks/xwalks_industry/xpath_ind1990_ifr19.do"
tab ind1990 if industry_ifr==""
drop if industry_ifr==""

***Industry shares***
gen ind_share_manufacturing=1
replace ind_share_manufacturing=0 if (industry=="services" | industry=="construction" | industry=="research" | industry=="utilities" | industry=="agriculture" | industry=="mining")
gen ind_share_manufacturing_f=ind_share_manufacturing*(sex==2)
gen ind_share_manufacturing_m=ind_share_manufacturing*(sex==1)
gen ind_share_industrial=1
replace ind_share_industrial=0 if (industry=="services" | industry=="research" | industry=="utilities" | industry=="agriculture")
gen ind_share_low_usage=(industry=="paper"| industry=="textiles" | industry=="vehicles_other"|industry=="furniture") /*Industries with less than 1% of world stock*/
gen ind_share_light_manuf=(industry=="textiles" | industry=="paper") 
gen ind_share_heavy_manuf=(ind_share_manufacturing==1&(industry!="textiles" | industry!="paper") )
gen ind_share_construction=(industry=="construction")
gen ind_share_mining=(industry=="mining")
gen ind_share_utilities=(industry=="utilities")
gen ind_share_agriculture=(industry=="agriculture")
gen ind_share_research=(industry=="research")
gen ind_share_services=(industry=="services")
gen ind_share_cars=(industry=="automotive")

gen emp=1

**Merge czones using geography xwalk**
if  `year'==1970{
gen ctygrp1970=cntygp97
collapse (sum) emp ind_share_* [fw=perwt], by(ctygrp1970)  fast
count if ctygrp1970!=.
joinby ctygrp1970 using "$project/xwalks/xwalks_geography/ctygrp1970_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==1990{
gen puma1990=statefip*10000+puma
collapse (sum) emp ind_share_* [fw=perwt], by(puma1990)  fast
joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==2000 | `year'==2008{
replace puma=1801 if puma==77777
gen puma2000=statefip*10000+puma
collapse (sum) emp ind_share_* [fw=perwt], by(puma2000)  fast
joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
assert czone!=. 
}

**Aggregate at the czone level**
gen full_wt=afac
collapse (sum) emp ind_share_* [iw=full_wt], by(czone) fast
foreach var of varlist ind_share_*{
replace `var'=`var'/emp
rename `var' `var'_`year'
}
drop emp

save "$clean_data_lmarket/czone`year'_IndustryShares.dta", replace
}


